// .........................................................................
// Title: compustat.do
//
// Imports and cleans Compustat data
// .........................................................................

* ---------------------------------------
* Import CRSP returns
* ---------------------------------------

use "$raw/crsp/crsp_monthly.dta", clear
keep if trdstat == "A"

gen ym = mofd(date)
format ym %tm
duplicates drop 

gen shares_adj=shrout*cfacshr
gen prc_adj=abs(prc)/cfacpr

gen mkval=shares_adj*prc_adj

keep permno cusip ym mkval ret retx shares_adj prc_adj
duplicates drop 
xtset permno ym

gen lr12 =(1+l.ret)*(1+l2.ret)*(1+l3.ret)*(1+l4.ret)*(1+l5.ret)*(1+l6.ret)*(1+l7.ret)*(1+l8.ret)*(1+l9.ret)*(1+l10.ret)*(1+l11.ret)*(1+ret) 
gen lrx_12=(1+l.retx)*(1+l2.retx)*(1+l3.retx)*(1+l4.retx)*(1+l5.retx)*(1+l6.retx)*(1+l7.retx)*(1+l8.retx)*(1+l9.retx)*(1+l10.retx)*(1+l11.retx)*(1+retx) 
gen dp=lr12/lrx_12-1
gen lr24 =lr12*l12.lr12
gen lrx_24=lrx_12*l12.lrx_12
gen lr36=lr12*l12.lr12*l24.lr12
gen lrx_36=lrx_12*l12.lrx_12*l24.lrx_12

gen fr12=f12.lr12 
gen frx_12=f12.lrx_12
gen fr24=f12.lr12*f24.lr12
gen frx_24=f12.lrx_12*f24.lrx_12
drop ret*

preserve
drop permno
tempfile price1
save "`price1'"
restore

preserve
drop cusip
ren permno lpermno
*ren mkval mkval1
tempfile price2
save "`price2'"
restore

* ---------------------------------------
* Import CRSP-Compustat links
* ---------------------------------------

use "$raw/crsp/crsp_compustat_link.dta", clear
foreach var of varlist * {
    local try = lower("`var'")
    cap rename `var' `try'
}
gen order = 1 if linktype == "LC"
replace order = 2 if linktype == "LU"
replace order = 3 if linktype == "LN"
replace order = 4 if order ==.
rename linkdt datadate
bysort gvkey datadate: egen order_min = min(order)
keep if order == order_min
duplicates drop gvkey datadate, force
tempfile ccm
save "`ccm'"

* ---------------------------------------
* Compustat cleaning
* ---------------------------------------

use "$raw/compustat/compustat_northam_annual.dta", clear
keep if indfmt=="INDL" & datafmt=="STD" & popsrc=="D" & consol=="C"
merge 1:1 gvkey datadate using "`ccm'", keep (1 3) keepusing(linktype lpermno lpermco)
drop _merge

drop if at==.
drop if fyear==.
destring sic, replace

* Drop federal agencies
drop if tic=="3FNMA" //Fannie Mae
drop if tic=="3FMCC" //Freddie Mac

* Issuer CUSIP
replace cusip=substr(cusip,1,8)
gen cusip6=substr(cusip,1,6)
destring gvkey, gen (id)

* Set panel
xtset id fyear

gen ym = ym(fyear, fyr) if fyr>=6
replace ym = ym(fyear+1, fyr) if fyr<=5
gen yq = qofd(dofm(ym))
gen year = year(dofm(ym))
gen yrmo = year*100 + month(dofm(ym))

merge 1:1 cusip ym using "`price1'", keep(match master)
tab _merge
drop _merge

merge m:1 lpermno ym using "`price2'", keep(1 3 4 5) update
tab _merge
drop _merge

replace mkval = mkval/1000

xtset
gen opast = at - che   /*operating asset*/
gen opliab = at -dlc -dltt -ceq -pstk -mibt   /*operating liabilities*/  
replace opliab = at -dlc -dltt -ceq -pstk if mibt==.
gen accrual = (d.act - d.che) - (d.lct - d.dlc - d.txp) - dp   /*accruals*/

* Total liabilities
gen lt_at = lt/at

* Debt
gen dlc_debt = dlc/(dltt+dlc)
gen ds_debt = (dlc-dd1)/(dltt+dlc)
gen dlc_at = dlc/at
gen ds_at = (dlc-dd1)/at
gen ltdebtch_at = (dltt-l.dltt)/l.at

xtset
gen netdebt_at = (dltis-dltr)/l.at  
gen netdebt_tot_at = (dltt+dlc-l.dltt-l.dlc)/l.at
gen netequity_at = (prstkc-sstk)/l.at

gen debt_ebitda = (dlc+dltt)/ebitda if ebitda>0
gen ebitda_debt = ebitda/(dlc+dltt)
gen dltt_ebitda = dltt/ebitda if ebitda>0

* Change in working capital
gen drect_at = (rect-l.rect)/l.at
gen dinvt_at = (invt-l.invt)/l.at
gen dap_at = (ap-l.ap)/l.at
gen dwcap_at = dap_at - drect_at - dinvt_at

* Interest expense
xtset
gen intpaid=intpn-l.intpn
gen intpaid_at = intpaid/l.at
gen int_at = intpn/l.at
gen xint_at = xint/l.at
gen xint_debt = xint/(dlc+dltt)  
gen int_debt = intpn/(dlc+dltt)  
gen ebitda_int = ebitda/intpn 
gen ebitda_xint = ebitda/xint

* Leverage
gen lev = at/seq if seq>0
gen lev1 = (dlc+dltt)/at
gen mlev = (mkval+at-seq)/mkval

* Asset composition
gen tan_at=(ppent+invt)/at
gen ppent_at=ppent/at
gen invt_at = invt/at
gen rect_at = rect/at
gen cash_at = che/at

gen gdwl_at = gdwl/at
gen intan_at = intan/at

gen re_at = re/at
gen opast_at = opast/at
gen opliab_at = opliab/at
gen noa_at = (opast-opliab)/at
gen accrual_at = accrual/at

* Cash flows
xtset
gen ebitda_at = ebitda/l.at
gen ebit_at = ebit/l.at
gen ib_at = ib/l.at
gen cf_at = (ib+dp)/l.at
gen ni_at = ni/l.at
gen margin = ni/sale
gen emargin = ebitda/sale
gen sale_at = sale/l.at  

gen oancf_at = oancf/l.at
gen ocf_at = (oancf+xint)/l.at
replace ocf_at = (oancf)/l.at if xint==.

gen sppe_at = sppe/l.at
gen sppiv_at = sppiv/l.at
gen nopi_at = nopi/l.at
gen spi_at = spi/l.at
gen xi_at = xi/l.at

* Other variables
gen KZ =-1.002*(ni)/l.at-39.368*dvt/l.at-1.315*che/at+3.139*(dltt+dlc)/at
gen Z = 1.2*wcap/at + 1.4*re/at +3.3*ebitda/l.at +0.6*mkval/lt + sale/l.at

* Valuations
gen Q = (mkval+dlc+dltt)/at
gen mtb = mkval/ceq
gen btm = ceq/mkval

* Growth
xtset
gen debtgr = log(dlc+dltt) - log(l.dlc+l.dltt)
gen atgr = log(at/l.at)
gen empgr = log(emp/l.emp)
gen ebitdagr = log(ebitda/l.ebitda) if l.ebitda>0
gen salegr = log(sale/l.sale) if l.sale>0
gen capxgr = log(capx/l.capx)

* Investment 
gen capx_at = capx/l.at
gen xrd_at = xrd/l.at
gen xrdgr = log(xrd/l.xrd)
gen aqc_at = aqc/l.at
gen capx_ppent = capx/l.ppent
gen capx_xrd_at = (capx+xrd)/l.at
gen capx_xrd_ppent = (capx+xrd)/l.ppent

* Expenses
gen xopr_at = xopr/l.at
gen xopt_at = xintopt/l.at if fyear<2006
gen xsga_at = xsga/l.at
gen am_at = am/l.at
gen dp_at = dp/l.at

gen cogs_at = cogs/l.at
gen invtp_at = (cogs + invt - l.invt)/l.at
gen xrent_at = xrent/l.at

* Payout
gen dvt_at = dvt/l.at
gen payout_at = (dvt+prstkc)/l.at

* Winsorize
foreach item of varlist lt_at - payout_at dp {
  bysort fyear: egen tmp_ph=pctile(`item'), p(99)
  bysort fyear: egen tmp_pl=pctile(`item'), p(1)
  replace `item'=. if `item'>tmp_ph 
  replace `item'=. if `item'<tmp_pl 
  drop tmp*
}  

foreach item of varlist lr* fr*  {
  bysort fyear: egen tmp_ph=pctile(`item'), p(99)
  bysort fyear: egen tmp_pl=pctile(`item'), p(1)
  replace `item'=. if `item'>tmp_ph 
  replace `item'=. if `item'<tmp_pl 
  drop tmp*
  replace `item' = `item' - 1
}

* Quantiles
local k = 3
foreach item of varlist at sale emp margin {

    forvalues t=1/`k'{
        local h = `t'*(100/(`k'+1))
        bysort fyear: egen `item'_p`h'=pctile(`item') , p(`h')
    }

    gen `item'_tile = . 

    forvalues t=1/`k'{
        local h = `t'*(100/(`k'+1))
        local b = (`t'-1)*(100/(`k'+1))

        if `t' == 1{
            replace `item'_tile = 1 if `item'<=`item'_p`h' & `item'!=.
        }
        else {
            replace `item'_tile = `t' if `item'<=`item'_p`h' & `item'>`item'_p`b' & `item'!=.
        }
    }
    local s = (100/(`k'+1))*`k'
    replace `item'_tile = `k'+1 if `item'>`item'_p`s' & `item'!=.
    drop `item'_p*

}

save "$tmp/compustat_out_ann.dta", replace
